This code is for the caregiver speech measures from transcripts used in the manuscript.

Load libraries and set theme

library(tidyverse)
library(Hmisc)
library(GGally)
library(ppcor)
library(gridExtra)
library(psych)

# https://github.com/ggobi/ggally/issues/139
my_custom_smooth <- function(data, mapping, ...) {
  ggplot(data = data, mapping = mapping) +
    geom_point(alpha = .4, color = I("black")) + 
    geom_smooth(method = "lm", color = I("blue"), ...)
}

theme_set(theme_bw())

Read in data

# NOTE about periods of non-tCDCS
# gemods refers to when there are designated start/end periods of other-directed speech (ODS); this was captured using gems (@G) using CHAT conventions
# kwalods refers to when ODS was transcribed at an utterance-level within a tCDS activity period between caregiver and child (e.g., other-directed speech in the background); this was captured per utterances using CHAT postcodes
## for tokens/min and types/min, we do not include ODS that occurred within a period of tCDS, because durations were captured by activity and not by utterance
## for mlu, we include all ODS across gemods and kwalods


# NOTE about speech == "all"
# "speech" includes two levels: all, spont
# all = refers to all speech by caregivers
# spont = refers to only speech by caregivers that was considered spontaneous rather than recited (e.g., reading book text, singing memorized common songs like itsy bitsy spider); therefore, 'spont' is a subset of 'all'

# freq
freq <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  dplyr::select(-X1) %>% 
  filter(activity != "kwalods") %>% 
  filter(speech == "all") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac", 
                                                "gemods")), 
         id = factor(id), 
         language = factor(language), 
         speech = factor(speech))


# mlu
mlu <- read_csv("./data_demo_lena_transcripts/mlu.csv") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac", "ods")), 
         id = factor(id), 
         language = factor(language), 
         speech = factor(speech)) %>% 
  filter(speech == "all")


# chip
# this includes only caregivers, therefore there is no speaker column
# we exclude periods of ODS because this is about responsiveness to the child during periods of tCDS
chip <- read_csv("./data_demo_lena_transcripts/chip.csv") %>% 
  filter(activity != "ods") %>% 
  mutate(activity = factor(activity, levels = c("books", "play", "food", 
                                                "routines", "conv", "ac")), 
         id = factor(id), 
         language = factor(language)) 

str(freq)
## tibble[,12] [3,308 × 12] (S3: tbl_df/tbl/data.frame)
##  $ id           : Factor w/ 90 levels "7292","7352",..: 47 47 47 47 50 50 52 52 52 52 ...
##  $ rectime      : num [1:3308] 11923 11923 31360 31360 21499 ...
##  $ activity     : Factor w/ 7 levels "books","play",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ speaker      : chr [1:3308] "CHI" "ADULTS" "CHI" "ADULTS" ...
##  $ tokens       : num [1:3308] 30 151 35 143 58 588 42 286 33 152 ...
##  $ types        : num [1:3308] 17 70 17 65 17 199 19 53 17 59 ...
##  $ segment_num  : num [1:3308] 12 12 15 15 2 2 11 11 5 5 ...
##  $ language     : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ speech       : Factor w/ 1 level "all": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dur_min      : num [1:3308] 3.55 3.55 6.57 6.57 4.71 ...
##  $ tokens_permin: num [1:3308] 8.46 42.57 5.32 21.75 12.31 ...
##  $ types_permin : num [1:3308] 4.79 19.73 2.59 9.89 3.61 ...
str(mlu)
## spec_tbl_df[,9] [3,002 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id         : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ activity   : Factor w/ 7 levels "books","play",..: 6 6 5 5 7 7 2 2 6 6 ...
##  $ speaker    : chr [1:3002] "ADULTS" "CHI" "ADULTS" "CHI" ...
##  $ segment_num: num [1:3002] 2 2 2 2 2 2 2 2 3 3 ...
##  $ words_sum  : num [1:3002] 210 66 175 43 11 16 189 47 261 78 ...
##  $ num_utt_sum: num [1:3002] 66 35 64 24 2 12 64 28 87 43 ...
##  $ mlu_w      : num [1:3002] 3.18 1.89 2.73 1.79 5.5 ...
##  $ language   : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ speech     : Factor w/ 2 levels "all","spont": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   activity = col_character(),
##   ..   speaker = col_character(),
##   ..   segment_num = col_double(),
##   ..   words_sum = col_double(),
##   ..   num_utt_sum = col_double(),
##   ..   mlu_w = col_double(),
##   ..   language = col_character(),
##   ..   speech = col_character()
##   .. )
str(chip)
## spec_tbl_df[,11] [1,118 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ activity                         : Factor w/ 6 levels "books","play",..: 6 5 2 6 5 4 6 5 4 2 ...
##  $ id                               : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ rectime                          : num [1:1118] 15242 15242 15242 14342 14342 ...
##  $ total_adult_utt                  : num [1:1118] 68 64 65 91 43 13 50 8 65 127 ...
##  $ total_child_utt                  : num [1:1118] 46 34 33 54 17 3 14 1 29 49 ...
##  $ total_adult_resp                 : num [1:1118] 62 51 54 77 24 9 30 4 56 106 ...
##  $ total_adult_imitexp              : num [1:1118] 18 13 15 25 5 2 9 0 16 21 ...
##  $ prop_adultresp_outof_childutt    : num [1:1118] 1.35 1.5 1.64 1.43 1.41 ...
##  $ prop_adult_imitexp_outof_childutt: num [1:1118] 0.391 0.382 0.455 0.463 0.294 ...
##  $ language                         : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ segment_num                      : num [1:1118] 2 2 2 3 3 3 4 4 4 5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   activity = col_character(),
##   ..   id = col_double(),
##   ..   rectime = col_double(),
##   ..   total_adult_utt = col_double(),
##   ..   total_child_utt = col_double(),
##   ..   total_adult_resp = col_double(),
##   ..   total_adult_imitexp = col_double(),
##   ..   prop_adultresp_outof_childutt = col_double(),
##   ..   prop_adult_imitexp_outof_childutt = col_double(),
##   ..   language = col_character(),
##   ..   segment_num = col_double()
##   .. )

Create dfs for ADULTS

# FREQ
freq_adult <- freq %>% 
  filter(speaker == "ADULTS")

# MLU
mlu_adult <- mlu %>% 
  filter(speaker == "ADULTS")

FREQ - Boxplots and descriptives for ADULTS

TOKENS (raw)

Freq (tokens, types)

  • excluded non-target children (NTC)
  • excluded overlapping ODS during CDS periods
  • averaged across all adult speakers
  • if an activity was not present, we did not include it (i.e., we did not impute any values to create complete cases)
  • 0 values are included when the individual does not speak though we know they are present in the activity
# relabel for plots
freq_adult <- freq_adult %>% 
  mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered",
                                     "gemods" = "non_tcds"), 
         language = recode(language, "English" = "english", "Spanish" = "spanish"))


# plot for all speech
ggplot(freq_adult, aes(activity, tokens, fill = activity)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language, ncol = 1) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# summarize data per participant and activity (all speech only)
freq_adult_act <- freq_adult %>% 
  group_by(id, activity) %>% 
  mutate(tokens_act = mean(tokens), 
         types_act = mean(types)) %>% 
  distinct(id, language, activity, tokens_act, types_act)

freq_adult_act_en <- freq_adult_act %>% filter(language == "english")
freq_adult_act_sp <- freq_adult_act %>% filter(language == "spanish")



# descriptives
describeBy(freq_adult_act_en$tokens_act, freq_adult_act_en$activity, mat = T, fast = T)
##     item         group1 vars  n      mean        sd   min       max    range        se
## X11    1          books    1 22 480.90076 272.79828 147.0 1115.7500 968.7500 58.160789
## X12    2           play    1 39 234.99060 132.96267  42.5  714.0000 671.5000 21.291067
## X13    3           food    1 31 145.78495 124.42440  12.0  432.0000 420.0000 22.347282
## X14    4       routines    1 32 135.71823 107.06498   7.0  494.0000 487.0000 18.926592
## X15    5    unst. conv.    1 43 152.50504 100.82982  11.0  382.7500 371.7500 15.376403
## X16    6 adult-centered    1 45  90.56519  60.38539   4.0  295.0000 291.0000  9.001722
## X17    7       non_tcds    1 45 154.69407  96.87180  15.0  418.6667 403.6667 14.440795
freq_adult_en_minmax_tokens_raw <- freq_adult %>% 
  filter(language == "english") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens, na.rm = T), 
         max = max(tokens, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_tokens_raw
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          english     65  1474
## 2 play           english      0   743
## 3 unst. conv.    english      4   817
## 4 routines       english      7   692
## 5 food           english      1   630
## 6 adult-centered english      0   397
## 7 non_tcds       english      0   945
describeBy(freq_adult_act_sp$tokens_act, freq_adult_act_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd       min      max    range       se
## X11    1          books    1 20 436.9875 212.49434 77.000000 768.0000 691.0000 47.51518
## X12    2           play    1 37 180.9775 136.02617 25.000000 571.0000 546.0000 22.36256
## X13    3           food    1 31 155.3602 119.25669  6.000000 445.0000 439.0000 21.41913
## X14    4       routines    1 35 166.9405 133.27275  4.000000 635.5000 631.5000 22.52721
## X15    5    unst. conv.    1 43 139.6674 123.38751 13.000000 754.6667 741.6667 18.81642
## X16    6 adult-centered    1 45 106.6911  89.50469 21.500000 426.5000 405.0000 13.34257
## X17    7       non_tcds    1 45 113.3085  82.01389  4.166667 323.5000 319.3333 12.22591
freq_adult_sp_minmax_tokens_raw <- freq_adult %>% 
  filter(language == "spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens, na.rm = T), 
         max = max(tokens, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_tokens_raw
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          spanish     77   927
## 2 play           spanish     15   802
## 3 unst. conv.    spanish      5  1012
## 4 routines       spanish      4   937
## 5 food           spanish      6   566
## 6 adult-centered spanish      0   566
## 7 non_tcds       spanish      0   795

TOKENS (rate per min)

freq_adult <- freq_adult %>% 
  mutate(language = recode(language, "english" = "English", "spanish" = "Spanish"))


# plot for all speech
ggplot(freq_adult, aes(activity, tokens_permin, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Tokens Rate") + 
  theme(panel.spacing = unit(4, "lines"))

ggsave("./figures/boxplot_tokens_rate2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# summarize data per participant and activity (all speech only)
freq_adult_act_permin <- freq_adult %>% 
  group_by(id, activity) %>% 
  mutate(tokens_permin_act = mean(tokens_permin), 
         types_permin_act = mean(types_permin)) %>% 
  distinct(id, language, activity, tokens_permin_act, types_permin_act)

freq_adult_act_permin_en <- freq_adult_act_permin %>% filter(language == "English")
freq_adult_act_permin_sp <- freq_adult_act_permin %>% filter(language == "Spanish")

# descriptives
describeBy(freq_adult_act_permin_en$tokens_permin_act, freq_adult_act_permin_en$activity, mat = T, fast = T)
##     item         group1 vars  n     mean       sd       min       max     range        se
## X11    1          books    1 22 97.05612 25.83716 32.157566 139.35186 107.19429  5.508500
## X12    2           play    1 39 63.00664 24.36993 19.809892 122.54285 102.73296  3.902312
## X13    3           food    1 31 63.20593 33.34061 17.818695 191.38756 173.56887  5.988150
## X14    4       routines    1 32 70.52775 27.24599 28.267219 157.18563 128.91841  4.816457
## X15    5    unst. conv.    1 43 76.14884 35.36817 10.266822 224.58716 214.32033  5.393596
## X16    6 adult-centered    1 45 85.01633 73.45787 19.605147 534.52116 514.91601 10.950453
## X17    7       non_tcds    1 45 36.98633 21.17975  4.063964  87.65849  83.59453  3.157290
freq_adult_en_minmax_tokens_rate <- freq_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens_permin, na.rm = T), 
         max = max(tokens_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_tokens_rate
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          English  21.7   149.
## 2 play           English   0     183.
## 3 unst. conv.    English   2.93  225.
## 4 routines       English  21.4   157.
## 5 food           English  12.3   191.
## 6 adult-centered English   0     535.
## 7 non_tcds       English   0     167.
describeBy(freq_adult_act_permin_sp$tokens_permin_act, freq_adult_act_permin_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean       sd       min       max     range       se
## X11    1          books    1 20 69.70156 22.42204 33.415438 114.94578  81.53034 5.013720
## X12    2           play    1 37 51.27875 20.41805  8.535673 118.63334 110.09767 3.356707
## X13    3           food    1 31 37.39510 15.81841  9.649060  71.37418  61.72512 2.841070
## X14    4       routines    1 35 59.05829 22.87843 13.861479 118.88422 105.02275 3.867161
## X15    5    unst. conv.    1 43 59.56066 23.87057 24.971949 137.97635 113.00440 3.640229
## X16    6 adult-centered    1 45 56.17319 33.33514 14.119577 161.15476 147.03518 4.969310
## X17    7       non_tcds    1 45 31.05852 17.21041  2.157086  71.16063  69.00355 2.565577
freq_adult_sp_minmax_tokens_rate <- freq_adult %>% 
  filter(language == "Spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(tokens_permin, na.rm = T), 
         max = max(tokens_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_tokens_rate
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          Spanish  33.4  115. 
## 2 play           Spanish   8.54 128. 
## 3 unst. conv.    Spanish  10.4  140. 
## 4 routines       Spanish  13.3  183. 
## 5 food           Spanish   9.65  91.1
## 6 adult-centered Spanish   0    343. 
## 7 non_tcds       Spanish   0    153.

TYPES (raw)

# df for just all speech
ggplot(freq_adult, aes(activity, types, fill = activity)) +
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# descriptives
describeBy(freq_adult_act_en$types_act, freq_adult_act_en$activity, mat = T, fast = T)
##     item         group1 vars  n      mean       sd  min      max    range        se
## X11    1          books    1 22 143.90227 66.70063 56.0 277.7500 221.7500 14.220623
## X12    2           play    1 39  85.90256 36.92368 27.5 197.0000 169.5000  5.912521
## X13    3           food    1 31  64.56989 42.01631 12.0 177.0000 165.0000  7.546351
## X14    4       routines    1 32  58.62083 31.98695  7.0 144.0000 137.0000  5.654547
## X15    5    unst. conv.    1 43  65.25039 36.72048 10.0 163.0000 153.0000  5.599821
## X16    6 adult-centered    1 45  44.60630 24.26759  4.0 129.0000 125.0000  3.617598
## X17    7       non_tcds    1 45  79.91481 44.48101 10.5 199.8333 189.3333  6.630837
freq_adult_en_minmax_types_raw <- freq_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types, na.rm = T), 
         max = max(types, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_types_raw
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          English     38   369
## 2 play           English      0   253
## 3 unst. conv.    English      4   267
## 4 routines       English      7   246
## 5 food           English      1   181
## 6 adult-centered English      0   159
## 7 non_tcds       English      0   351
describeBy(freq_adult_act_sp$types_act, freq_adult_act_sp$activity, mat = T, fast = T)
##     item         group1 vars  n      mean       sd  min      max    range        se
## X11    1          books    1 20 144.82500 64.74266 44.0 268.5000 224.5000 14.476900
## X12    2           play    1 37  62.33784 35.84100 14.0 157.0000 143.0000  5.892225
## X13    3           food    1 31  65.50538 36.39642  5.0 133.0000 128.0000  6.536989
## X14    4       routines    1 35  65.69762 33.60470  4.0 141.5000 137.5000  5.680232
## X15    5    unst. conv.    1 43  59.88527 32.98391  8.0 160.6667 152.6667  5.030000
## X16    6 adult-centered    1 45  48.42926 26.92731 15.0 121.0000 106.0000  4.014086
## X17    7       non_tcds    1 45  59.29000 36.12099  3.5 137.8333 134.3333  5.384600
freq_adult_sp_minmax_types_raw <- freq_adult %>% 
  filter(language == "Spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types, na.rm = T), 
         max = max(types, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_types_raw
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          Spanish     44   361
## 2 play           Spanish      7   186
## 3 unst. conv.    Spanish      5   219
## 4 routines       Spanish      4   175
## 5 food           Spanish      5   166
## 6 adult-centered Spanish      0   212
## 7 non_tcds       Spanish      0   289

TYPES (rate per min)

# plot for all speech
ggplot(freq_adult, aes(activity, types_permin, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Types Rate")

ggsave("./figures/boxplot_types_rate2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# descriptives
describeBy(freq_adult_act_permin_en$types_permin_act, freq_adult_act_permin_en$activity, mat = T, fast = T)
##     item         group1 vars  n     mean       sd       min       max     range        se
## X11    1          books    1 22 32.06383  9.75467 14.809274  48.75449  33.94522  2.079703
## X12    2           play    1 39 27.07891 12.32556 12.245120  66.31838  54.07326  1.973670
## X13    3           food    1 31 39.82161 34.99837  9.280570 191.38756 182.10699  6.285893
## X14    4       routines    1 32 41.75257 25.57367 14.409224 157.18563 142.77640  4.520830
## X15    5    unst. conv.    1 43 41.83628 27.40421  5.648173 176.14679 170.49862  4.179103
## X16    6 adult-centered    1 45 61.14224 75.86224 10.605640 534.52116 523.91552 11.308874
## X17    7       non_tcds    1 45 20.66782 11.41460  2.392211  48.90393  46.51172  1.701588
freq_adult_en_minmax_types_rate <- freq_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types_permin, na.rm = T), 
         max = max(types_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_en_minmax_types_rate
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          English   9.89  64.4
## 2 play           English   0    149. 
## 3 unst. conv.    English   2.93 176. 
## 4 routines       English  11.2  157. 
## 5 food           English   8.50 191. 
## 6 adult-centered English   0    535. 
## 7 non_tcds       English   0     92.2
describeBy(freq_adult_act_permin_sp$types_permin_act, freq_adult_act_permin_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd       min       max     range       se
## X11    1          books    1 20 25.19438  9.378178 10.866809  49.60044  38.73363 2.097024
## X12    2           play    1 37 21.98845 11.038304  4.405509  66.43467  62.02916 1.814686
## X13    3           food    1 31 19.09226  8.249124  8.576942  39.99831  31.42137 1.481586
## X14    4       routines    1 35 30.57141 14.693167  9.082726  73.61036  64.52764 2.483598
## X15    5    unst. conv.    1 43 32.41628 16.819376 10.061461 100.19711  90.13565 2.564931
## X16    6 adult-centered    1 45 38.00249 28.213513  7.901010 137.53863 129.63762 4.205822
## X17    7       non_tcds    1 45 18.23642  9.936926  1.666453  43.46532  41.79886 1.481309
freq_adult_sp_minmax_types_rate <- freq_adult %>% 
  filter(language == "spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(types_permin, na.rm = T), 
         max = max(types_permin, na.rm = T)) %>% 
  distinct(activity, min, max)

freq_adult_sp_minmax_types_rate
## # A tibble: 0 x 4
## # Groups:   activity, language [0]
## # … with 4 variables: activity <fct>, language <fct>, min <dbl>, max <dbl>

MLU

MLU

  • excluded NTC
  • KEPT overlapping ODS during CDS periods
  • averaged across all adult speakers
  • if an activity was not present, we did not include it (i.e., we did not impute any values to create complete cases)
  • we excluded observations when there were 0 number of utterances, since we cannot calculate an MLU when there are no utterances
# relabel for plots
mlu_adult <- mlu_adult %>% 
  mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered",
                           "ods" = "non_tcds"), 
         language = recode(language, "english" = "English", "spanish" = "Spanish"))


# plot
ggplot(mlu_adult, aes(activity, mlu_w, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "MLUw")

ggsave("./figures/boxplot_mluw2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# summarize data per participant and activity (all speech only)
mlu_adult_act <- mlu_adult %>% 
  group_by(id, activity) %>% 
  mutate(mlu_w_act = mean(mlu_w)) %>% 
  distinct(id, language, activity, mlu_w_act)

mlu_adult_act_en <- mlu_adult_act %>% filter(language == "English")
mlu_adult_act_sp <- mlu_adult_act %>% filter(language == "Spanish")

# descriptives
describeBy(mlu_adult_act_en$mlu_w_act, mlu_adult_act_en$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd      min      max    range        se
## X11    1          books    1 22 4.860163 1.2386474 2.843305 7.287671 4.444366 0.2640805
## X12    2           play    1 39 3.544910 0.7799806 2.117647 5.428571 3.310924 0.1248969
## X13    3           food    1 31 3.508170 0.8406000 2.125000 5.328869 3.203869 0.1509762
## X14    4       routines    1 32 3.665860 0.7357554 2.200000 5.670455 3.470455 0.1300644
## X15    5    unst. conv.    1 43 3.763843 0.8423445 1.500000 5.769231 4.269231 0.1284563
## X16    6 adult-centered    1 45 3.524902 0.7379635 2.248271 5.668605 3.420334 0.1100091
## X17    7       non_tcds    1 45 4.150646 0.8011976 2.542017 5.770249 3.228232 0.1194355
mlu_adult_en_minmax <- mlu_adult %>% 
  filter(language == "English") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(mlu_w, na.rm = T), 
         max = max(mlu_w, na.rm = T)) %>% 
  distinct(activity, min, max)

mlu_adult_en_minmax
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 adult-centered English   1     9   
## 2 unst. conv.    English   1     6.67
## 3 non_tcds       English   1     8   
## 4 play           English   1.33  5.69
## 5 routines       English   2.2   8   
## 6 books          English   1.51  8.73
## 7 food           English   1     6.06
describeBy(mlu_adult_act_sp$mlu_w_act, mlu_adult_act_sp$activity, mat = T, fast = T)
##     item         group1 vars  n     mean        sd      min      max    range         se
## X11    1          books    1 20 3.822411 1.3037314 1.864407 6.566855 4.702448 0.29152319
## X12    2           play    1 37 2.698639 0.7463847 1.422360 4.545918 3.123558 0.12270488
## X13    3           food    1 31 2.772200 0.6975939 1.391304 4.230769 2.839465 0.12529156
## X14    4       routines    1 35 3.029701 0.7381221 1.791667 4.485947 2.694281 0.12476540
## X15    5    unst. conv.    1 43 3.054863 0.5741442 2.004357 4.589286 2.584928 0.08755616
## X16    6 adult-centered    1 45 2.756766 0.6920525 1.666667 5.019476 3.352810 0.10316510
## X17    7       non_tcds    1 45 3.499380 0.7220236 1.890000 5.202889 3.312889 0.10763292
mlu_adult_sp_minmax <- mlu_adult %>% 
  filter(language == "Spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(mlu_w, na.rm = T), 
         max = max(mlu_w, na.rm = T)) %>% 
  distinct(activity, min, max)

mlu_adult_sp_minmax
## # A tibble: 7 x 4
## # Groups:   activity, language [7]
##   activity       language   min   max
##   <fct>          <fct>    <dbl> <dbl>
## 1 books          Spanish   1.86  8.36
## 2 play           Spanish   1.21  6.48
## 3 unst. conv.    Spanish   1     5.33
## 4 routines       Spanish   1.58  5   
## 5 food           Spanish   1.39  4.32
## 6 adult-centered Spanish   1     6.22
## 7 non_tcds       Spanish   1     7.9

CHIP

Responses (RAW and PROP)

CHIP (responses, imitations/expansions; these are utterances that follow a child’s utterance, within a 5 utterance window)

  • excluded NTC
  • excluded overlapping ODS during CDS periods
  • averaged across all adult speakers
  • if an activity was not present, we did not include it (i.e., we did not impute any values to create complete cases)
  • we keep 0 values, which are because the adults did not respond even though the child did have utterances [e.g., numerator is 0 but denominator is a value]; if a child did not have utterances, then adult responses could not be calculated so the number of observations differ here from freq and mlu
  • greater than 1 = caregiver had more utterances in response to the child; less than 1 = child had more utterances than caregiver responses
# create dfs
chip2 <- chip %>% 
  mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered"), 
         language = recode(language, "english" = "English", "spanish" = "Spanish"))


# plot - total adult responses
ggplot(chip2, aes(activity, total_adult_resp, fill = activity)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language, ncol = 1) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# plot - prop of adult resp/child utt
ggplot(chip2, aes(activity, prop_adultresp_outof_childutt, fill = activity)) + 
  theme_classic() +
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  geom_hline(yintercept = 1) + # caregiver and child = equal utts
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Prop Responses")

ggsave("./figures/boxplot_prop_resp2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# summarize data per participant and activity (all speech only)
chip_act <- chip %>% 
  group_by(id, activity) %>% 
  mutate(prop_resp_act = mean(prop_adultresp_outof_childutt), 
         prop_imitexp_act = mean(prop_adult_imitexp_outof_childutt)) %>% 
  distinct(id, language, activity, prop_resp_act, prop_imitexp_act)

chip_act_en <- chip_act %>% filter(language == "english")
chip_act_sp <- chip_act %>% filter(language == "spanish")


# descriptives
describeBy(chip_act_en$prop_resp_act, chip_act_en$activity, mat = T, fast = T)
##     item   group1 vars  n     mean        sd       min      max    range         se
## X11    1    books    1 22 2.378070 0.7766933 1.1864443 4.366667 3.180222 0.16559157
## X12    2     play    1 36 2.074418 0.6357208 1.1301724 4.666667 3.536494 0.10595346
## X13    3     food    1 29 1.914779 0.5461100 0.8141026 3.066667 2.252564 0.10141009
## X14    4 routines    1 28 1.856904 0.5897762 0.8000000 3.333333 2.533333 0.11145723
## X15    5     conv    1 36 1.866984 0.5430045 0.6250000 2.933333 2.308333 0.09050074
## X16    6       ac    1 22 1.855095 0.5401576 0.9909502 3.248677 2.257727 0.11516198
chip_adult_en_minmax_propresp <- chip %>% 
  filter(language == "english") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adultresp_outof_childutt, na.rm = T), 
         max = max(prop_adultresp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_en_minmax_propresp
## # A tibble: 6 x 4
## # Groups:   activity, language [6]
##   activity language   min   max
##   <fct>    <fct>    <dbl> <dbl>
## 1 ac       english  0      5   
## 2 conv     english  0.571  5   
## 3 play     english  0.667  4.67
## 4 routines english  0.5    3.8 
## 5 books    english  0.859  5   
## 6 food     english  0      5
describeBy(chip_act_sp$prop_resp_act, chip_act_sp$activity, mat = T, fast = T)
##     item   group1 vars  n     mean        sd       min      max    range         se
## X11    1    books    1 20 2.125135 0.4832411 1.0617284 3.055556 1.993827 0.10805599
## X12    2     play    1 31 1.927563 0.6742332 0.8125000 3.500000 2.687500 0.12109586
## X13    3     food    1 30 1.762135 0.7100370 0.3333333 3.314815 2.981481 0.12963442
## X14    4 routines    1 31 1.904239 0.5335071 1.0000000 3.166667 2.166667 0.09582070
## X15    5     conv    1 35 1.818201 0.5605011 0.8823529 3.119048 2.236695 0.09474197
## X16    6       ac    1 25 1.589711 0.6019761 0.7083333 3.750000 3.041667 0.12039523
chip_adult_sp_minmax_propresp <- chip %>% 
  filter(language == "spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adultresp_outof_childutt, na.rm = T), 
         max = max(prop_adultresp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_sp_minmax_propresp
## # A tibble: 6 x 4
## # Groups:   activity, language [6]
##   activity language   min   max
##   <fct>    <fct>    <dbl> <dbl>
## 1 ac       spanish  0      5   
## 2 conv     spanish  0.7    5   
## 3 play     spanish  0.765  5   
## 4 food     spanish  0.333  4   
## 5 routines spanish  1      5   
## 6 books    spanish  1.06   3.06

Imitations/Expansions (RAW and PROP)

# plot
ggplot(chip2, aes(activity, total_adult_imitexp, fill = activity)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .2) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language, ncol = 1) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "")

# plot
ggplot(chip2, aes(activity, prop_adult_imitexp_outof_childutt, fill = activity)) + 
  theme_classic() + 
  geom_boxplot() + 
  geom_jitter(alpha = .3) + 
  scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) + 
  facet_wrap(~ language) + 
  theme(legend.position= "none") +
  theme(text = element_text(size = 35)) +
  geom_hline(yintercept = 1) + # caregiver and child = equal utts
  theme(axis.text.x = element_text(angle = 20, hjust = .7)) + 
  labs(x = "", y = "Prop Imitations/Expansions")

ggsave("./figures/boxplot_prop_imitexp2.pdf", dpi = 300, width = 18, height = 8, units = "in")


# descriptives
describeBy(chip_act_en$prop_imitexp_act, chip_act_en$activity, mat = T, fast = T)
##     item   group1 vars  n      mean        sd        min       max     range         se
## X11    1    books    1 22 0.4822402 0.2032150 0.07142857 0.8000000 0.7285714 0.04332558
## X12    2     play    1 36 0.4082746 0.2249434 0.00000000 1.1157407 1.1157407 0.03749056
## X13    3     food    1 29 0.3716799 0.1958653 0.00000000 1.0000000 1.0000000 0.03637128
## X14    4 routines    1 28 0.3527558 0.1764344 0.00000000 0.6746032 0.6746032 0.03334296
## X15    5     conv    1 36 0.3490170 0.1925299 0.00000000 0.8888889 0.8888889 0.03208831
## X16    6       ac    1 22 0.3648964 0.1674221 0.18972991 0.8666667 0.6769368 0.03569452
chip_adult_en_minmax_propimitexp <- chip %>% 
  filter(language == "english") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adult_imitexp_outof_childutt, na.rm = T), 
         max = max(prop_adult_imitexp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_en_minmax_propimitexp
## # A tibble: 6 x 4
## # Groups:   activity, language [6]
##   activity language    min   max
##   <fct>    <fct>     <dbl> <dbl>
## 1 ac       english  0       4   
## 2 conv     english  0       2   
## 3 play     english  0       1.38
## 4 routines english  0       1   
## 5 books    english  0.0714  1   
## 6 food     english  0       1
describeBy(chip_act_sp$prop_imitexp_act, chip_act_sp$activity, mat = T, fast = T)
##     item   group1 vars  n      mean        sd    min       max     range         se
## X11    1    books    1 20 0.4052845 0.2414756 0.0625 1.0000000 0.9375000 0.05399559
## X12    2     play    1 31 0.3536352 0.2224772 0.0000 1.1700000 1.1700000 0.03995808
## X13    3     food    1 30 0.3333968 0.2401772 0.0000 1.0000000 1.0000000 0.04385016
## X14    4 routines    1 31 0.3949755 0.2920670 0.0000 1.0322636 1.0322636 0.05245678
## X15    5     conv    1 35 0.3856342 0.2314362 0.0000 0.9523810 0.9523810 0.03911986
## X16    6       ac    1 25 0.3015461 0.1634934 0.0000 0.7603067 0.7603067 0.03269868
chip_adult_sp_minmax_propimitexp <- chip %>% 
  filter(language == "spanish") %>% 
  group_by(activity, language) %>% 
  mutate(min = min(prop_adult_imitexp_outof_childutt, na.rm = T), 
         max = max(prop_adult_imitexp_outof_childutt, na.rm = T)) %>% 
  distinct(activity, min, max)

chip_adult_sp_minmax_propimitexp
## # A tibble: 6 x 4
## # Groups:   activity, language [6]
##   activity language   min   max
##   <fct>    <fct>    <dbl> <dbl>
## 1 ac       spanish      0  1.67
## 2 conv     spanish      0  2   
## 3 play     spanish      0  2   
## 4 food     spanish      0  1   
## 5 routines spanish      0  3   
## 6 books    spanish      0  1